In [6]:
import graphlab
import os
In [21]:
path='C:\Users\Matheus\Documents\GitHub\UW-Machine-Learning-Specialization\Week 3'
os.chdir(path)
In [24]:
products = graphlab.SFrame("amazon_baby.csv")
In [25]:
products.head()
Out[25]:
In [27]:
#Build a word count vector for each review
products['word_count'] = graphlab.text_analytics.count_words(products['review'])
In [28]:
products.head()
Out[28]:
In [29]:
graphlab.canvas.set_target('ipynb')
In [30]:
products['name'].show()
In [31]:
giraffe_reviews = products[products['name']=='Vulli Sophie the Giraffe Teether']
In [32]:
len(giraffe_reviews)
Out[32]:
In [33]:
giraffe_reviews['rating'].show(view='Categorical')
In [34]:
products['rating'].show(view='Categorical')
In [36]:
#ignore all 3 stars reviews
products = products[products['rating']!=3]
In [37]:
#positive sentiment = 4* or 5* reviews
products['sentiment'] = products['rating']>=4
In [38]:
products.head()
Out[38]:
In [39]:
train_data, test_data = products.random_split(.8,seed=0)
In [40]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
In [41]:
sentiment_model.evaluate(test_data, metric='roc_curve')
Out[41]:
In [43]:
sentiment_model.show(view='Evaluation')
In [44]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
In [45]:
giraffe_reviews.head()
Out[45]:
In [ ]: